{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c0a3eef-31e2-4928-b5b1-54efd47c8d41",
   "metadata": {},
   "outputs": [],
   "source": [
    "s3_bucket = '<INSERT S3 BUCKET HERE>'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0932510b-db9d-4051-8c7b-466e34879435",
   "metadata": {},
   "outputs": [],
   "source": [
    "prefix = 'pipeline'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eedbb6c3-f1dd-4e91-aff0-6a2696f53a80",
   "metadata": {},
   "outputs": [],
   "source": [
    "endpoint_name = \"AutoGluonEndpoint\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dde4ddb4-045c-4960-8299-ec3942a3260e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.predictor import Predictor\n",
    "\n",
    "import sagemaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5108f9fb-0d09-4d0c-bfd4-b97b49c7d7cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "session = sagemaker.Session()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63520c4d-812d-4f1e-889a-e72040c1de6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p tmp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf529af0-cb38-466f-bd29-fe66e35f669f",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_data_path = f\"s3://{s3_bucket}/{prefix}/output/test/data.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0d4b967-d07a-40dc-93e6-8ff3979488f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp {test_data_path} tmp/test_data.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0eb6e9e-4ede-40a3-817c-6af6142cc412",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "test_df = pd.read_csv(\"tmp/test_data.csv\", header=None)\n",
    "test_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1349e186-9569-467d-a0ed-fb59f0278af0",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df.rename(\n",
    "    columns={ \n",
    "        test_df.columns[0]: \"is_cancelled\" \n",
    "    }, \n",
    "    inplace = True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bf6bb94-9606-47b4-a599-146973fd4917",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09fc5c98-5f4a-4d07-a6a4-ef5df49ba0fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "predictor = Predictor(endpoint_name, session)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "520a526a-cba7-4235-9ff6-f50f97fdb0a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.serializers import CSVSerializer\n",
    "from sagemaker.deserializers import JSONDeserializer\n",
    "\n",
    "predictor.serializer = CSVSerializer()\n",
    "predictor.deserializer = JSONDeserializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a32fdb4f-7b92-49bc-831a-9360ea48e3f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "\n",
    "def get_test_payload(index, test_df=test_df):\n",
    "    test_data = test_df.drop(['is_cancelled'], axis=1)\n",
    "    target_record = test_data.iloc[index]\n",
    "    predictor_values = target_record.to_csv(\n",
    "        header=None,\n",
    "        index=False,\n",
    "        quotechar='\"',\n",
    "        quoting=csv.QUOTE_NONNUMERIC\n",
    "    ).split()\n",
    "    csv_string = ','.join(predictor_values)\n",
    "    return csv_string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f70453dd-b18b-416e-a9b8-96f2170be6b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_test_actual_result(index, test_df=test_df):\n",
    "    result = test_df.iloc[index]['is_cancelled']\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ceecd158-74c7-4513-aabf-93631160c2af",
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(index, predictor=predictor):\n",
    "    payload = get_test_payload(index)\n",
    "    prediction = predictor.predict([payload])\n",
    "    print(prediction)\n",
    "    [[prob_0, prob_1]] = prediction['probabilities']\n",
    "    \n",
    "    if prob_0 > prob_1:\n",
    "        return 0\n",
    "    else:\n",
    "        return 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f72d33d1-cec5-450a-9fdb-c913d085ef93",
   "metadata": {},
   "outputs": [],
   "source": [
    "predict(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd4ea5b0-5d45-4c12-8eb8-2208716aa843",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from time import sleep\n",
    "\n",
    "actual_list = []\n",
    "predicted_list = []\n",
    "\n",
    "for i in range(0, 100):\n",
    "    actual = get_test_actual_result(i)\n",
    "    predicted = predict(i)\n",
    "    print(f\"[iteration # {i}]\")\n",
    "    print(f\"actual = {actual}; predicted = {predicted}\")\n",
    "    \n",
    "    actual_list.append(actual)\n",
    "    predicted_list.append(predicted)\n",
    "\n",
    "    sleep(0.05)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "763250f0-ce61-4538-b6bb-2cb2a505f344",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "\n",
    "target_names = ['not cancelled', 'cancelled']\n",
    "print(classification_report(actual_list, predicted_list, target_names=target_names))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f0f9ef5-80c5-4644-8d8f-abd4e12819e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "accuracy_score(actual_list, predicted_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "67d4eaf3-dd14-4f2d-8457-d08653df724d",
   "metadata": {},
   "outputs": [],
   "source": [
    "predictor.delete_endpoint()"
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}